1#!/opt/tcl/bin/tclsh 2#----------------------------------------------------------------------------- 3# Copyright (c) 1999 Jochen C. Loewer (loewerj@hotmail.com) 4#----------------------------------------------------------------------------- 5# 6# 7# Script to generate 'space and time optimal' C code for fixed 8# converting tables from Unicode to 8bit encodings (ISO-8859*,CP850...) 9# from the Tcl 8.2 encoding files (*.enc) 10# 11# 12# 13# The contents of this file are subject to the Mozilla Public License 14# Version 1.1 (the "License"); you may not use this file except in 15# compliance with the License. You may obtain a copy of the License at 16# http://www.mozilla.org/MPL/ 17# 18# Software distributed under the License is distributed on an "AS IS" 19# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 20# License for the specific language governing rights and limitations 21# under the License. 22# 23# The Original Code is tDOM. 24# 25# The Initial Developer of the Original Code is Jochen Loewer 26# Portions created by Jochen Loewer are Copyright (C) 1998, 1999 27# Jochen Loewer. All Rights Reserved. 28# 29# Contributor(s): 30# 31# 32# written by Jochen Loewer 33# November, 1999 34# 35#----------------------------------------------------------------------------- 36 37 38 39#----------------------------------------------------------------------------- 40# Log 41# 42#----------------------------------------------------------------------------- 43proc Log { message } { 44 puts stderr $message 45} 46 47 48#----------------------------------------------------------------------------- 49# HexValue 50# 51#----------------------------------------------------------------------------- 52proc HexValue { v } { 53 return [format "0x%2X" $v] 54} 55 56 57#----------------------------------------------------------------------------- 58# HEX 59# 60#----------------------------------------------------------------------------- 61proc HEX { v } { 62 return [format "\\%03o" $v] 63} 64 65 66#----------------------------------------------------------------------------- 67# ReadEncodingFile 68# 69#----------------------------------------------------------------------------- 70proc ReadEncodingFile { encodingFile info_var map_var } { 71 72 upvar $info_var info $map_var map 73 74 catch { unset info } 75 catch { unset map } 76 77 set info(max) 0 78 79 Log "Reading encoding file $encodingFile ..." 80 81 set fd [open $encodingFile r] 82 83 #-------------------------------------------------------------- 84 # read header 85 # 86 #-------------------------------------------------------------- 87 set line [gets $fd] ;# ignore comment line 88 89 set line [gets $fd] 90 91 if {$line != "S"} { 92 error "Only single byte encodings are supported" 93 } 94 set line [gets $fd] 95 scan $line "%s %d %d" fbHex info(symbol) info(npages) 96 97 set fb [binary format H2 [string range $fbHex 2 4]] 98 binary scan $fb c info(fallback) 99 100 #-------------------------------------------------------------- 101 # read each single mapping page 102 # 103 #-------------------------------------------------------------- 104 for {set p 0} {$p < $info(npages)} {incr p} { 105 106 set line [gets $fd] 107 108 binary scan [binary format H2 $line] c page 109 110 #---------------------------------------------------- 111 # read 16 * 16 hex number -> 256 mappings 112 # 113 #---------------------------------------------------- 114 for {set l 0} {$l < 16} {incr l} { 115 116 set line [gets $fd] 117 118 for {set k 0} {$k < 16} {incr k} { 119 120 set hex [string range $line 0 3] 121 set line [string range $line 4 end] 122 binary scan [binary format H4 $hex] S from 123 set to [expr ($page << 8) + ($l * 16) + $k] 124 Log "$from -> $to" 125 126 #------------------------------ 127 # set mapping 128 #------------------------------ 129 set map($from) $to 130 131 if {$from > $info(max)} {set info(max) $from} 132 } 133 } 134 } 135 close $fd 136 Log "fallback='$info(fallback)' max=$info(max) symbol=$info(symbol) npages=$info(npages)" 137 Log "Reading done." 138 Log "" 139} 140 141 142 143 144#----------------------------------------------------------------------------- 145# BuildInitalRanges 146# 147#----------------------------------------------------------------------------- 148proc BuildInitalRanges { info_var map_var} { 149 150 upvar $info_var info $map_var map 151 152 set mode different 153 set last -1 154 155 set ranges {} 156 157 for {set from 1} {$from <= $info(max)} {incr from} { 158 if {![info exists map($from)]} { 159 set to $info(fallback) 160 } else { 161 set to $map($from) 162 } 163 if {$mode == "identic"} { 164 if {$from == $to} { 165 set last $from 166 } else { 167 lappend ranges [list $identicStart [expr $last - $identicStart +1] {}] 168 Log "$identicStart, $last, IDENTIC, NULL, " 169 if {$to == $info(fallback)} { 170 set mode fallback 171 } else { 172 lappend ranges [list $from 1 $to] 173 Log "$from -> $to" 174 set mode different 175 } 176 } 177 } elseif {$mode == "different"} { 178 if {$from == $to} { 179 set identicStart $from 180 set last $from 181 set mode identic 182 } elseif {$to == $info(fallback)} { 183 set mode fallback 184 } else { 185 lappend ranges [list $from 1 $to] 186 Log"$from -> $to" 187 } 188 } else { 189 if {$to != $info(fallback)} { 190 if {$from == $to} { 191 set identicStart $from 192 set last $from 193 set mode identic 194 } else { 195 lappend ranges [list $from 1 $to] 196 Log "$from -> $to" 197 } 198 } 199 } 200 } 201 if {$mode == "identic"} { 202 lappend ranges [list $identicStart [expr $last - $identicStart +1] {}] 203 Log "$identicStart, $last, IDENTIC, NULL, " 204 } 205 return $ranges 206} 207 208 209#----------------------------------------------------------------------------- 210# OptimizeRanges 211# 212#----------------------------------------------------------------------------- 213proc OptimizeRanges { fallback ranges } { 214 215 set newranges {} 216 set lastfrom {} 217 218 foreach range $ranges { 219 foreach {from len values} $range break 220 221 if {($len > 50) && ($values == {}) } { 222 if {$lastfrom != {} } { 223 lappend newranges [list $lastfrom $lastlen $lastvalues] 224 } 225 lappend newranges [list $from $len $values] 226 set lastfrom {} 227 } elseif {$lastfrom != {} } { 228 #Log "lastfrom=$lastfrom lastlen=$lastlen" 229 if { ($lastfrom + $lastlen + 20) > $from} { 230 231 if {$lastvalues == {}} { 232 for {set j 0} {$j < $lastlen} {incr j} { 233 lappend lastvalues [expr $lastfrom + $j] 234 } 235 incr lastlen $lastlen 236 } 237 for {set i [expr $lastfrom + $lastlen]} {$i < $from} {incr i} { 238 lappend lastvalues $fallback 239 incr lastlen 240 } 241 if {$values == {}} { 242 for {set j 0} {$j < $len} {incr j} { 243 lappend lastvalues [expr $from + $j] 244 } 245 incr lastlen $len 246 } else { 247 set lastvalues [concat $lastvalues $values] 248 incr lastlen $len 249 } 250 } else { 251 lappend newranges [list $lastfrom $lastlen $lastvalues] 252 set lastfrom $from 253 set lastlen $len 254 set lastvalues $values 255 } 256 } else { 257 set lastfrom $from 258 set lastlen $len 259 set lastvalues $values 260 } 261 } 262 if {$lastfrom != {} } { 263 lappend newranges [list $lastfrom $lastlen $lastvalues] 264 } 265 return $newranges 266} 267 268 269#----------------------------------------------------------------------------- 270# OutputCode 271# 272#----------------------------------------------------------------------------- 273proc OutputCode { encVar fallback ranges } { 274 275 puts "static TEncodingRule TDOM_UnicodeTo$encVar \[\] = \{" 276 277 foreach range $ranges { 278 foreach {from len values} $range break 279 if {$values == {}} { 280 puts " \{ ENC_IDENTITY, $from, $len, \"\" \}, " 281 } else { 282 puts " \{ ENC_MAP, $from, $len, " 283 set i 0 284 foreach value $values { 285 if {$i == 0} { 286 puts -nonewline " \"" 287 } 288 puts -nonewline "[HEX $value]" 289 incr i 290 if {$i == 14} { 291 puts -nonewline "\"\n" 292 set i 0 293 } 294 } 295 if {$i > 0} { 296 puts -nonewline "\" \},\n" 297 } else { 298 puts -nonewline " \},\n" 299 } 300 } 301 } 302 puts " \{ ENC_END, 0, 0, NULL \} " 303 puts "\};\n" 304} 305 306 307 308#----------------------------------------------------------------------------- 309# begin of main part 310#----------------------------------------------------------------------------- 311 312 313 puts "/*------------------------------------------------------------------------" 314 puts "| WARNING! This is file automatically generated by GenCompactCodings ! " 315 puts "| WARNING! Do not edit! " 316 puts "| " 317 puts "| Unicode(UTF) ---> 8bit code conversion tables " 318 puts "| " 319 puts "\\-----------------------------------------------------------------------*/" 320 321 322 set fallbacks {} 323 set encodings {} 324 325 foreach encodingFile $argv { 326 327 regsub {(\.enc)$} $encodingFile {} encoding 328 set encVar [string toupper $encoding] 329 regsub -- {-} $encVar {} encVar 330 331 ReadEncodingFile $encodingFile info map 332 333 foreach from [lsort -integer [array names map]] { 334 Log "$from -> $map($from)" 335 } 336 337 #------------------------------------------- 338 # build the initial map ranges 339 #------------------------------------------- 340 set ranges [ BuildInitalRanges info map ] 341 342 Log "Starting ranges [llength $ranges]:" 343 foreach range $ranges { 344 foreach {from len values} $range break 345 Log [format "%3d %3d '%s'" $from $len $values] 346 } 347 348 #------------------------------------------- 349 # iterate to optimize ranges 350 #------------------------------------------- 351 for {set loop 0} {$loop < 4} {incr loop} { 352 set ranges [OptimizeRanges $info(fallback) $ranges] 353 } 354 355 Log "End ranges [llength $ranges]:" 356 foreach range $ranges { 357 foreach {from len values} $range break 358 Log [format "%3d %3d '%s'\n" $from $len $values] 359 } 360 361 lappend fallbacks $info(fallback) 362 lappend encodings $encoding $encVar 363 364 OutputCode $encVar $info(fallback) $ranges 365 } 366 367 puts "" 368 puts "static TEncoding TDOM_UnicodeTo8bitEncodings \[\] = \{" 369 foreach {encoding encVar} $encodings fallback $fallbacks { 370 puts stdout [format " { %-12s, %4s, %s }," \ 371 "\"$encoding\"" \ 372 [HexValue $fallback] \ 373 TDOM_UnicodeTo$encVar ] 374 375 } 376 puts " { NULL, 0, NULL }" 377 puts "\};" 378 379 380#----------------------------------------------------------------------------- 381# end of main part 382#----------------------------------------------------------------------------- 383 384