1129198Scognet# Convert tzdata source into vanguard or rearguard form. 2129198Scognet 3139735Simp# Contributed by Paul Eggert. This file is in the public domain. 4129198Scognet 5129198Scognet# This is not a general-purpose converter; it is designed for current tzdata. 6129198Scognet# It just converts from current source to main, vanguard, and rearguard forms. 7129198Scognet# Although it might be nice for it to be idempotent, or to be useful 8129198Scognet# for converting back and forth between vanguard and rearguard formats, 9129198Scognet# it does not do these nonessential tasks now. 10129198Scognet# 11129198Scognet# Although main and vanguard forms are currently equivalent, 12129198Scognet# this need not always be the case. When the two forms differ, 13129198Scognet# this script can convert either from main to vanguard form (needed then), 14129198Scognet# or from vanguard to main form (this conversion would be needed later, 15129198Scognet# after main became rearguard and vanguard became main). 16129198Scognet# There is no need to convert rearguard to other forms. 17129198Scognet# 18129198Scognet# When converting to vanguard form, the output can use the line 19129198Scognet# "Zone GMT 0 - GMT" which TZUpdater 2.3.2 mistakenly rejects. 20129198Scognet# 21129198Scognet# When converting to vanguard form, the output can use negative SAVE 22129198Scognet# values. 23129198Scognet# 24129198Scognet# When converting to rearguard form, the output uses only nonnegative 25129198Scognet# SAVE values. The idea is for the output data to simulate the behavior 26129198Scognet# of the input data as best it can within the constraints of the 27129198Scognet# rearguard format. 28129198Scognet 29129198Scognet# Given a FIELD like "-0:30", return a minute count like -30. 30129198Scognetfunction get_minutes(field, \ 31129198Scognet sign, hours, minutes) 32129198Scognet{ 33129198Scognet sign = field ~ /^-/ ? -1 : 1 34129198Scognet hours = +field 35129198Scognet if (field ~ /:/) { 36129198Scognet minutes = field 37139735Simp sub(/[^:]*:/, "", minutes) 38129198Scognet } 39129198Scognet return 60 * hours + sign * minutes 40129198Scognet} 41129198Scognet 42129198Scognet# Given an OFFSET, which is a minute count like 300 or 330, 43129198Scognet# return a %z-style abbreviation like "+05" or "+0530". 44129198Scognetfunction offset_abbr(offset, \ 45129198Scognet hours, minutes, sign) 46129198Scognet{ 47129198Scognet hours = int(offset / 60) 48129198Scognet minutes = offset % 60 49129198Scognet if (minutes) { 50129198Scognet return sprintf("%+.4d", hours * 100 + minutes); 51129198Scognet } else { 52129198Scognet return sprintf("%+.2d", hours) 53129198Scognet } 54129198Scognet} 55129198Scognet 56129198Scognet# Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second. 57129198Scognetfunction round_to_second(timestamp, \ 58129198Scognet hh, mm, ss, seconds, dot_dddd, subseconds) 59129198Scognet{ 60129198Scognet dot_dddd = timestamp 61129198Scognet if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd)) 62129198Scognet return timestamp 63129198Scognet hh = mm = ss = timestamp 64129198Scognet sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss) 65129198Scognet sub(/^[-+]?[0-9]+:/, "", mm) 66129198Scognet sub(/^[-+]?/, "", hh) 67129198Scognet seconds = 3600 * hh + 60 * mm + ss 68129198Scognet subseconds = +dot_dddd 69129198Scognet seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2)); 70129198Scognet return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \ 71129198Scognet seconds / 3600, seconds / 60 % 60, seconds % 60) 72129198Scognet} 73129198Scognet 74129198ScognetBEGIN { 75129198Scognet dataform_type["vanguard"] = 1 76129198Scognet dataform_type["main"] = 1 77129198Scognet dataform_type["rearguard"] = 1 78129198Scognet 79129198Scognet if (PACKRATLIST) { 80129198Scognet while (getline <PACKRATLIST) { 81137274Scognet if ($0 ~ /^#/) continue 82137274Scognet packratlist[$3] = 1 83129198Scognet } 84129198Scognet } 85129198Scognet 86129198Scognet # The command line should set DATAFORM. 87129198Scognet if (!dataform_type[DATAFORM]) exit 1 88129198Scognet} 89129198Scognet 90129198Scognet$1 == "#PACKRATLIST" && $2 == PACKRATLIST { 91129198Scognet sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "") 92129198Scognet} 93129198Scognet 94129198Scognet/^Zone/ { zone = $2 } 95129198Scognet 96129198ScognetDATAFORM != "main" { 97129198Scognet in_comment = $0 ~ /^#/ 98129198Scognet uncomment = comment_out = 0 99129198Scognet 100129198Scognet # If this line should differ due to Czechoslovakia using negative SAVE values, 101129198Scognet # uncomment the desired version and comment out the undesired one. 102129198Scognet if (zone == "Europe/Prague" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \ 103129198Scognet && $0 ~ /1947 Feb 23/) { 104129198Scognet if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) { 105129198Scognet uncomment = in_comment 106129198Scognet } else { 107129198Scognet comment_out = !in_comment 108129198Scognet } 109129198Scognet } 110129198Scognet 111129198Scognet # If this line should differ due to Ireland using negative SAVE values, 112129198Scognet # uncomment the desired version and comment out the undesired one. 113129198Scognet Rule_Eire = $0 ~ /^#?Rule[\t ]+Eire[\t ]/ 114129198Scognet Zone_Dublin_post_1968 \ 115129198Scognet = (zone == "Europe/Dublin" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \ 116129198Scognet && (!$(in_comment + 4) || 1968 < $(in_comment + 4))) 117129198Scognet if (Rule_Eire || Zone_Dublin_post_1968) { 118129198Scognet if ((Rule_Eire \ 119129198Scognet || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \ 120129198Scognet == (DATAFORM != "rearguard")) { 121129198Scognet uncomment = in_comment 122129198Scognet } else { 123129198Scognet comment_out = !in_comment 124129198Scognet } 125129198Scognet } 126129198Scognet 127129198Scognet # If this line should differ due to Namibia using negative SAVE values, 128129198Scognet # uncomment the desired version and comment out the undesired one. 129129198Scognet Rule_Namibia = $0 ~ /^#?Rule[\t ]+Namibia[\t ]/ 130171780Scognet Zone_using_Namibia_rule \ 131171780Scognet = (zone == "Africa/Windhoek" && $0 ~ /^#?[\t ]+[12]:00[\t ]/ \ 132135655Scognet && ($(in_comment + 2) == "Namibia" \ 133135655Scognet || ($(in_comment + 2) == "-" && $(in_comment + 3) == "CAT" \ 134129198Scognet && ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \ 135135655Scognet || in_comment + 3 == NF)))) 136137274Scognet if (Rule_Namibia || Zone_using_Namibia_rule) { 137135655Scognet if ((Rule_Namibia \ 138129198Scognet ? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \ 139137274Scognet : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \ 140135655Scognet == (DATAFORM != "rearguard")) { 141135655Scognet uncomment = in_comment 142135655Scognet } else { 143135655Scognet comment_out = !in_comment 144135655Scognet } 145135655Scognet } 146135655Scognet 147135655Scognet # If this line should differ due to Portugal benefiting from %z if supported, 148135655Scognet # uncomment the desired version and comment out the undesired one. 149135655Scognet if ($0 ~ /^#?[\t ]+-[12]:00[\t ]+Port[\t ]+[%+-]/) { 150135655Scognet if (($0 ~ /%z/) == (DATAFORM == "vanguard")) { 151135655Scognet uncomment = in_comment 152135655Scognet } else { 153135655Scognet comment_out = !in_comment 154135655Scognet } 155135655Scognet } 156135655Scognet 157135655Scognet # In vanguard form, use the line "Zone GMT 0 - GMT" instead of 158135655Scognet # "Zone Etc/GMT 0 - GMT" and adjust Link lines accordingly. 159135655Scognet # This works around a bug in TZUpdater 2.3.2. 160135655Scognet if (/^#?(Zone|Link)[\t ]+(Etc\/)?GMT[\t ]/) { 161135655Scognet if (($2 == "GMT") == (DATAFORM == "vanguard")) { 162135655Scognet uncomment = in_comment 163135655Scognet } else { 164135655Scognet comment_out = !in_comment 165135655Scognet } 166135655Scognet } 167135655Scognet 168135655Scognet if (uncomment) { 169135655Scognet sub(/^#/, "") 170135655Scognet } 171135655Scognet if (comment_out) { 172135655Scognet sub(/^/, "#") 173135655Scognet } 174135655Scognet 175135655Scognet # Prefer %z in vanguard form, explicit abbreviations otherwise. 176135655Scognet if (DATAFORM == "vanguard") { 177135655Scognet sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \ 178135655Scognet "&CHANGE-TO-%z") 179135655Scognet sub(/-00CHANGE-TO-%z/, "-00") 180135655Scognet sub(/[-+][^\t ]+CHANGE-TO-/, "") 181135655Scognet } else { 182135655Scognet if ($0 ~ /^[^#]*%z/) { 183135655Scognet stdoff_column = 2 * ($0 ~ /^Zone/) + 1 184135655Scognet rules_column = stdoff_column + 1 185135655Scognet stdoff = get_minutes($stdoff_column) 186135655Scognet rules = $rules_column 187135655Scognet stdabbr = offset_abbr(stdoff) 188135655Scognet if (rules == "-") { 189135655Scognet abbr = stdabbr 190172614Scognet } else { 191135655Scognet dstabbr_only = rules ~ /^[+0-9-]/ 192135655Scognet if (dstabbr_only) { 193135655Scognet dstoff = get_minutes(rules) 194135655Scognet } else { 195135655Scognet # The DST offset is normally an hour, but there are special cases. 196135655Scognet if (rules == "Morocco" && NF == 3) { 197135655Scognet dstoff = -60 198135655Scognet } else if (rules == "NBorneo") { 199135655Scognet dstoff = 20 200135655Scognet } else if (((rules == "Cook" || rules == "LH") && NF == 3) \ 201135655Scognet || (rules == "Uruguay" \ 202138751Scognet && $0 ~ /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) { 203138751Scognet dstoff = 30 204138751Scognet } else if (rules == "Uruguay" && $0 ~ /[\t ]1974 Mar 10$/) { 205135655Scognet dstoff = 90 206142570Scognet } else { 207142955Scognet dstoff = 60 208188540Scognet } 209188540Scognet } 210188540Scognet dstabbr = offset_abbr(stdoff + dstoff) 211188540Scognet if (dstabbr_only) { 212188540Scognet abbr = dstabbr 213188581Scognet } else { 214142570Scognet abbr = stdabbr "/" dstabbr 215138751Scognet } 216138751Scognet } 217138751Scognet sub(/%z/, abbr) 218138751Scognet } 219138856Scognet } 220138751Scognet 221129198Scognet # Normally, prefer whole seconds. However, prefer subseconds 222129198Scognet # if generating vanguard form and the otherwise-undocumented 223171780Scognet # VANGUARD_SUBSECONDS environment variable is set. 224129198Scognet # This relies on #STDOFF comment lines in the data. 225135655Scognet # It is for hypothetical clients that support UT offsets that are 226137274Scognet # not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912). 227129198Scognet # No known clients need this currently, and this experimental 228129198Scognet # feature may be changed or withdrawn in future releases. 229129198Scognet if ($1 == "#STDOFF") { 230129198Scognet stdoff = $2 231137274Scognet rounded_stdoff = round_to_second(stdoff) 232137274Scognet if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) { 233129198Scognet stdoff_subst[0] = rounded_stdoff 234129198Scognet stdoff_subst[1] = stdoff 235129198Scognet } else { 236129198Scognet stdoff_subst[0] = stdoff 237135655Scognet stdoff_subst[1] = rounded_stdoff 238129198Scognet } 239137274Scognet } else if (stdoff_subst[0]) { 240129198Scognet stdoff_column = 2 * ($0 ~ /^Zone/) + 1 241129198Scognet stdoff_column_val = $stdoff_column 242129198Scognet if (stdoff_column_val == stdoff_subst[0]) { 243137274Scognet sub(stdoff_subst[0], stdoff_subst[1]) 244137274Scognet } else if (stdoff_column_val != stdoff_subst[1]) { 245129198Scognet stdoff_subst[0] = 0 246137274Scognet } 247129198Scognet } 248129198Scognet 249129198Scognet # In rearguard form, change the Japan rule line with "Sat>=8 25:00" 250129198Scognet # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java. 251171780Scognet if ($0 ~ /^Rule/ && $2 == "Japan") { 252137274Scognet if (DATAFORM == "rearguard") { 253171780Scognet if ($7 == "Sat>=8" && $8 == "25:00") { 254129198Scognet sub(/Sat>=8/, "Sun>=9") 255171780Scognet sub(/25:00/, " 1:00") 256172614Scognet } 257137274Scognet } else { 258129198Scognet if ($7 == "Sun>=9" && $8 == "1:00") { 259129198Scognet sub(/Sun>=9/, "Sat>=8") 260137274Scognet sub(/ 1:00/, "25:00") 261137274Scognet } 262137274Scognet } 263129198Scognet } 264181144Scognet 265251866Sscottl # In rearguard form, change the Morocco lines with negative SAVE values 266129198Scognet # to use positive SAVE values. 267129198Scognet if ($2 == "Morocco") { 268129198Scognet if ($0 ~ /^Rule/) { 269129198Scognet if ($4 ~ /^201[78]$/ && $6 == "Oct") { 270142570Scognet if (DATAFORM == "rearguard") { 271175982Sraj sub(/\t2018\t/, "\t2017\t") 272188540Scognet } else { 273142570Scognet sub(/\t2017\t/, "\t2018\t") 274188540Scognet } 275188540Scognet } 276188540Scognet 277188540Scognet if (2019 <= $3) { 278129198Scognet if ($8 == "2:00") { 279142570Scognet if (DATAFORM == "rearguard") { 280142570Scognet sub(/\t0\t/, "\t1:00\t") 281188540Scognet } else { 282188540Scognet sub(/\t1:00\t/, "\t0\t") 283188540Scognet } 284188540Scognet } else { 285188540Scognet if (DATAFORM == "rearguard") { 286129198Scognet sub(/\t-1:00\t/, "\t0\t") 287129198Scognet } else { 288137274Scognet sub(/\t0\t/, "\t-1:00\t") 289129198Scognet } 290137274Scognet } 291137274Scognet } 292138751Scognet } 293129198Scognet if ($1 ~ /^[+0-9-]/ && NF == 3) { 294251866Sscottl if (DATAFORM == "rearguard") { 295138751Scognet sub(/1:00\tMorocco/, "0:00\tMorocco") 296129198Scognet sub(/\t\+01\/\+00$/, "\t+00/+01") 297138751Scognet } else { 298138751Scognet sub(/0:00\tMorocco/, "1:00\tMorocco") 299129198Scognet sub(/\t\+00\/+01$/, "\t+01/+00") 300138751Scognet } 301129198Scognet } 302129198Scognet } 303129198Scognet} 304129198Scognet 305129198Scognet/^Zone/ { 306129198Scognet packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2]; 307129198Scognet} 308129198Scognet{ 309129198Scognet if (packrat_ignored && $0 !~ /^Rule/) { 310129198Scognet sub(/^/, "#") 311129198Scognet } 312129198Scognet} 313129198Scognet 314129198Scognet# Return a link line resulting by changing OLDLINE to link to TARGET 315138414Scognet# from LINKNAME, instead of linking to OLDTARGET from LINKNAME. 316138414Scognet# Align data columns the same as they were in OLDLINE. 317138414Scognet# Also, replace any existing white space followed by comment with COMMENT. 318138414Scognetfunction make_linkline(oldline, target, linkname, oldtarget, comment, \ 319138414Scognet oldprefix, oldprefixlen, oldtargettabs, \ 320129198Scognet replsuffix, targettabs) 321129198Scognet{ 322129198Scognet oldprefix = "Link\t" oldtarget "\t" 323129198Scognet oldprefixlen = length(oldprefix) 324129198Scognet if (substr(oldline, 1, oldprefixlen) == oldprefix) { 325129198Scognet # Use tab stops to preserve LINKNAME's column. 326129198Scognet replsuffix = substr(oldline, oldprefixlen + 1) 327129198Scognet sub(/[\t ]*#.*/, "", replsuffix) 328138414Scognet oldtargettabs = int(length(oldtarget) / 8) + 1 329129198Scognet targettabs = int(length(target) / 8) + 1 330129198Scognet for (; targettabs < oldtargettabs; targettabs++) { 331129198Scognet replsuffix = "\t" replsuffix 332129198Scognet } 333137274Scognet for (; oldtargettabs < targettabs && replsuffix ~ /^\t/; targettabs--) { 334129198Scognet replsuffix = substr(replsuffix, 2) 335129198Scognet } 336129198Scognet } else { 337129198Scognet # Odd format line; don't bother lining up its replacement nicely. 338129198Scognet replsuffix = linkname 339129198Scognet } 340129198Scognet return "Link\t" target "\t" replsuffix comment 341129198Scognet} 342129198Scognet 343129198Scognet/^Link/ && $4 == "#=" && DATAFORM == "vanguard" { 344171780Scognet $0 = make_linkline($0, $5, $3, $2) 345129198Scognet} 346129198Scognet 347129198Scognet# If a Link line is followed by a Link or Zone line for the same data, comment 348129198Scognet# out the Link line. This can happen if backzone overrides a Link 349129198Scognet# with a Zone or a different Link. 350129198Scognet/^Zone/ { 351129198Scognet sub(/^Link/, "#Link", line[linkline[$2]]) 352129198Scognet} 353129198Scognet/^Link/ { 354129198Scognet sub(/^Link/, "#Link", line[linkline[$3]]) 355129198Scognet linkline[$3] = NR 356129198Scognet linktarget[$3] = $2 357129198Scognet} 358137274Scognet 359129198Scognet{ line[NR] = $0 } 360135655Scognet 361129198Scognetfunction cut_link_chains_short( \ 362129198Scognet l, linkname, t, target) 363129198Scognet{ 364129198Scognet for (linkname in linktarget) { 365129198Scognet target = linktarget[linkname] 366129198Scognet t = linktarget[target] 367129198Scognet if (t) { 368129198Scognet # TARGET is itself a link name. Replace the line "Link TARGET LINKNAME" 369129198Scognet # with "Link T LINKNAME #= TARGET", where T is at the end of the chain 370129198Scognet # of links that LINKNAME points to. 371129198Scognet while ((u = linktarget[t])) { 372129198Scognet t = u 373129198Scognet } 374129198Scognet l = linkline[linkname] 375129198Scognet line[l] = make_linkline(line[l], t, linkname, target, "\t#= " target) 376129198Scognet } 377129198Scognet } 378129198Scognet} 379129198Scognet 380129198ScognetEND { 381129198Scognet if (DATAFORM != "vanguard") { 382129198Scognet cut_link_chains_short() 383129198Scognet } 384129198Scognet for (i = 1; i <= NR; i++) 385129198Scognet print line[i] 386129198Scognet} 387129198Scognet