1325324Sgordon# Convert tzdata source into a smaller version of itself. 2325324Sgordon 3325324Sgordon# Contributed by Paul Eggert. This file is in the public domain. 4325324Sgordon 5325324Sgordon# This is not a general-purpose converter; it is designed for current tzdata. 6325324Sgordon# 'zic' should treat this script's output as if it were identical to 7325324Sgordon# this script's input. 8325324Sgordon 9325324Sgordon 10325324Sgordon# Return a new rule name. 11325324Sgordon# N_RULE_NAMES keeps track of how many rule names have been generated. 12325324Sgordon 13325324Sgordonfunction gen_rule_name(alphabet, base, rule_name, n, digit) 14325324Sgordon{ 15325324Sgordon alphabet = "" 16325324Sgordon alphabet = alphabet "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 17325324Sgordon alphabet = alphabet "abcdefghijklmnopqrstuvwxyz" 18325324Sgordon alphabet = alphabet "!$%&'()*+,./:;<=>?@[\\]^_`{|}~" 19325324Sgordon base = length(alphabet) 20325324Sgordon rule_name = "" 21325324Sgordon n = n_rule_names++ 22325324Sgordon 23325324Sgordon do { 24325324Sgordon n -= rule_name && n <= base 25325324Sgordon digit = n % base 26325324Sgordon rule_name = substr(alphabet, digit + 1, 1) rule_name 27325324Sgordon n = (n - digit) / base 28325324Sgordon } while (n); 29325324Sgordon 30325324Sgordon return rule_name 31325324Sgordon} 32325324Sgordon 33325324Sgordon# Process an input line and save it for later output. 34325324Sgordon 35325324Sgordonfunction process_input_line(line, field, end, i, n, startdef) 36325324Sgordon{ 37325324Sgordon # Remove comments, normalize spaces, and append a space to each line. 38325324Sgordon sub(/#.*/, "", line) 39325324Sgordon line = line " " 40325324Sgordon gsub(/[[:space:]]+/, " ", line) 41325324Sgordon 42325324Sgordon # Abbreviate keywords. Do not abbreviate "Link" to just "L", 43325324Sgordon # as pre-2017c zic erroneously diagnoses "Li" as ambiguous. 44325324Sgordon sub(/^Link /, "Li ", line) 45325324Sgordon sub(/^Rule /, "R ", line) 46325324Sgordon sub(/^Zone /, "Z ", line) 47325324Sgordon 48325324Sgordon # SystemV rules are not needed. 49325324Sgordon if (line ~ /^R SystemV /) return 50325324Sgordon 51325324Sgordon # Replace FooAsia rules with the same rules without "Asia", as they 52325324Sgordon # are duplicates. 53325324Sgordon if (match(line, /[^ ]Asia /)) { 54325324Sgordon if (line ~ /^R /) return 55325324Sgordon line = substr(line, 1, RSTART) substr(line, RSTART + 5) 56325324Sgordon } 57325324Sgordon 58325324Sgordon # Abbreviate times. 59325324Sgordon while (match(line, /[: ]0+[0-9]/)) 60325324Sgordon line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1) 61325324Sgordon while (match(line, /:0[^:]/)) 62325324Sgordon line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2) 63325324Sgordon 64325324Sgordon # Abbreviate weekday names. Do not abbreviate "Sun" and "Sat", as 65325324Sgordon # pre-2017c zic erroneously diagnoses "Su" and "Sa" as ambiguous. 66325324Sgordon while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) { 67325324Sgordon end = RSTART + RLENGTH 68325324Sgordon line = substr(line, 1, end - 4) substr(line, end - 1) 69325324Sgordon } 70325324Sgordon while (match(line, / (last)?(Tue|Thu)[ <>]/)) { 71325324Sgordon end = RSTART + RLENGTH 72325324Sgordon line = substr(line, 1, end - 3) substr(line, end - 1) 73325324Sgordon } 74325324Sgordon 75325324Sgordon # Abbreviate "max", "only" and month names. 76325324Sgordon # Do not abbreviate "min", as pre-2017c zic erroneously diagnoses "mi" 77325324Sgordon # as ambiguous. 78325324Sgordon gsub(/ max /, " ma ", line) 79325324Sgordon gsub(/ only /, " o ", line) 80325324Sgordon gsub(/ Jan /, " Ja ", line) 81325324Sgordon gsub(/ Feb /, " F ", line) 82325324Sgordon gsub(/ Apr /, " Ap ", line) 83325324Sgordon gsub(/ Aug /, " Au ", line) 84325324Sgordon gsub(/ Sep /, " S ", line) 85325324Sgordon gsub(/ Oct /, " O ", line) 86325324Sgordon gsub(/ Nov /, " N ", line) 87325324Sgordon gsub(/ Dec /, " D ", line) 88325324Sgordon 89325324Sgordon # Strip leading and trailing space. 90325324Sgordon sub(/^ /, "", line) 91325324Sgordon sub(/ $/, "", line) 92325324Sgordon 93325324Sgordon # Remove unnecessary trailing zero fields. 94325324Sgordon sub(/ 0+$/, "", line) 95325324Sgordon 96325324Sgordon # Remove unnecessary trailing days-of-month "1". 97325324Sgordon if (match(line, /[[:alpha:]] 1$/)) 98325324Sgordon line = substr(line, 1, RSTART) 99325324Sgordon 100325324Sgordon # Remove unnecessary trailing " Ja" (for January). 101325324Sgordon sub(/ Ja$/, "", line) 102325324Sgordon 103325324Sgordon n = split(line, field) 104325324Sgordon 105325324Sgordon # Abbreviate rule names. 106325324Sgordon i = field[1] == "Z" ? 4 : field[1] == "Li" ? 0 : 2 107325324Sgordon if (i && field[i] ~ /^[^-+0-9]/) { 108325324Sgordon if (!rule[field[i]]) 109325324Sgordon rule[field[i]] = gen_rule_name() 110325324Sgordon field[i] = rule[field[i]] 111325324Sgordon } 112325324Sgordon 113325324Sgordon # If this zone supersedes an earlier one, delete the earlier one 114325324Sgordon # from the saved output lines. 115325324Sgordon startdef = "" 116325324Sgordon if (field[1] == "Z") 117325324Sgordon zonename = startdef = field[2] 118325324Sgordon else if (field[1] == "Li") 119325324Sgordon zonename = startdef = field[3] 120325324Sgordon else if (field[1] == "R") 121325324Sgordon zonename = "" 122325324Sgordon if (startdef) { 123325324Sgordon i = zonedef[startdef] 124325324Sgordon if (i) { 125325324Sgordon do 126325324Sgordon output_line[i - 1] = "" 127325324Sgordon while (output_line[i++] ~ /^[-+0-9]/); 128325324Sgordon } 129325324Sgordon } 130325324Sgordon zonedef[zonename] = nout + 1 131325324Sgordon 132325324Sgordon # Save the line for later output. 133325324Sgordon line = field[1] 134325324Sgordon for (i = 2; i <= n; i++) 135325324Sgordon line = line " " field[i] 136325324Sgordon output_line[nout++] = line 137325324Sgordon} 138325324Sgordon 139325324Sgordonfunction output_saved_lines(i) 140325324Sgordon{ 141325324Sgordon for (i = 0; i < nout; i++) 142325324Sgordon if (output_line[i]) 143325324Sgordon print output_line[i] 144325324Sgordon} 145325324Sgordon 146325324SgordonBEGIN { 147325324Sgordon print "# This zic input file is in the public domain." 148325324Sgordon} 149325324Sgordon 150325324Sgordon/^[[:space:]]*[^#[:space:]]/ { 151325324Sgordon process_input_line($0) 152325324Sgordon} 153325324Sgordon 154325324SgordonEND { 155325324Sgordon output_saved_lines() 156325324Sgordon} 157