1#!/opt/tcl/bin/tclsh
2#-----------------------------------------------------------------------------
3#   Copyright (c) 1999 Jochen C. Loewer (loewerj@hotmail.com) 
4#-----------------------------------------------------------------------------
5#
6#
7#   Script to generate 'space and time optimal' C code for fixed 
8#   converting tables from Unicode to 8bit encodings (ISO-8859*,CP850...)
9#   from the Tcl 8.2 encoding files (*.enc)
10#
11#
12#
13#   The contents of this file are subject to the Mozilla Public License
14#   Version 1.1 (the "License"); you may not use this file except in
15#   compliance with the License. You may obtain a copy of the License at
16#   http://www.mozilla.org/MPL/
17#
18#   Software distributed under the License is distributed on an "AS IS"
19#   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
20#   License for the specific language governing rights and limitations
21#   under the License.
22#
23#   The Original Code is tDOM.
24#
25#   The Initial Developer of the Original Code is Jochen Loewer
26#   Portions created by Jochen Loewer are Copyright (C) 1998, 1999
27#   Jochen Loewer. All Rights Reserved.
28#
29#   Contributor(s):                                      
30#   
31#
32#   written by Jochen Loewer
33#   November, 1999
34#
35#-----------------------------------------------------------------------------
36
37
38
39#-----------------------------------------------------------------------------
40#   Log
41#
42#-----------------------------------------------------------------------------
43proc Log { message } {
44    puts stderr $message
45}
46
47
48#-----------------------------------------------------------------------------
49#   HexValue
50#
51#-----------------------------------------------------------------------------
52proc HexValue { v } {
53    return [format "0x%2X" $v]
54}
55
56
57#-----------------------------------------------------------------------------
58#   HEX
59#
60#-----------------------------------------------------------------------------
61proc HEX { v } {
62    return [format "\\%03o" $v]
63}
64
65
66#-----------------------------------------------------------------------------
67#   ReadEncodingFile
68#
69#-----------------------------------------------------------------------------
70proc ReadEncodingFile { encodingFile info_var map_var } {
71
72    upvar $info_var info $map_var map
73
74    catch { unset info }
75    catch { unset map  }
76
77    set info(max) 0
78
79    Log "Reading encoding file $encodingFile ..."
80
81    set fd [open $encodingFile r]
82
83    #--------------------------------------------------------------
84    #   read header
85    #
86    #--------------------------------------------------------------
87    set line [gets $fd]   ;# ignore comment line
88
89    set line [gets $fd]   
90
91    if {$line != "S"} {
92        error "Only single byte encodings are supported"
93    }
94    set line [gets $fd]
95    scan $line "%s %d %d" fbHex info(symbol) info(npages)
96
97    set fb [binary format H2 [string range $fbHex 2 4]]
98    binary scan $fb c info(fallback)
99 
100    #--------------------------------------------------------------
101    #   read each single mapping page
102    #
103    #--------------------------------------------------------------
104    for {set p 0} {$p < $info(npages)} {incr p} {
105 
106        set line [gets $fd]
107
108        binary scan [binary format H2 $line] c page
109
110        #----------------------------------------------------
111        #   read 16 * 16 hex number -> 256 mappings
112        #
113        #----------------------------------------------------
114        for {set l 0} {$l < 16} {incr l} {
115
116            set line [gets $fd]
117
118            for {set k 0} {$k < 16} {incr k} {
119
120                set hex [string range $line 0 3]
121                set line [string range $line 4 end]
122                binary scan [binary format H4 $hex] S from
123                set to [expr ($page << 8) + ($l * 16) + $k]
124                Log "$from -> $to"
125
126                #------------------------------
127                #   set mapping
128                #------------------------------
129                set map($from) $to
130
131                if {$from > $info(max)} {set info(max) $from}
132            }
133        }
134    }
135    close $fd
136    Log "fallback='$info(fallback)' max=$info(max) symbol=$info(symbol) npages=$info(npages)"
137    Log "Reading done."
138    Log ""
139}
140
141
142
143
144#-----------------------------------------------------------------------------
145#   BuildInitalRanges
146#
147#-----------------------------------------------------------------------------
148proc BuildInitalRanges { info_var map_var} {
149
150    upvar $info_var info $map_var map
151
152    set mode different
153    set last -1
154
155    set ranges {}
156
157    for {set from 1} {$from <= $info(max)} {incr from} {
158        if {![info exists map($from)]} {
159            set to $info(fallback)
160        } else {   
161            set to $map($from)
162        }
163        if {$mode == "identic"} {
164            if {$from == $to} {
165                set last $from
166            } else {
167                lappend ranges [list $identicStart [expr $last - $identicStart +1] {}]            
168                Log "$identicStart, $last, IDENTIC, NULL, "
169                if {$to == $info(fallback)} { 
170                    set mode fallback
171                } else {
172                    lappend ranges [list $from 1 $to]            
173                    Log "$from -> $to"
174                    set mode different
175                }
176            }
177        } elseif {$mode == "different"} {
178            if {$from == $to} {
179                set identicStart $from
180                set last         $from
181                set mode identic
182            } elseif {$to == $info(fallback)} {
183                set mode fallback
184            } else {
185                lappend ranges [list $from 1 $to]            
186                Log"$from -> $to"
187            }        
188        } else {
189            if {$to != $info(fallback)} {
190                if {$from == $to} {
191                    set identicStart $from
192                    set last         $from
193                    set mode identic
194                } else {
195                    lappend ranges [list $from 1 $to]            
196                    Log "$from -> $to"
197                }        
198            }
199        } 
200    }
201    if {$mode == "identic"} {
202        lappend ranges [list $identicStart [expr $last - $identicStart +1] {}]            
203        Log "$identicStart, $last, IDENTIC, NULL, "
204    }
205    return $ranges
206}
207
208
209#-----------------------------------------------------------------------------
210#   OptimizeRanges
211#
212#-----------------------------------------------------------------------------
213proc OptimizeRanges { fallback ranges } {
214
215    set newranges {}
216    set lastfrom  {}
217
218    foreach range $ranges {
219        foreach {from len values} $range break
220
221        if {($len > 50) && ($values == {}) } {
222            if {$lastfrom != {} } {
223                lappend newranges [list $lastfrom $lastlen $lastvalues]
224            }
225            lappend newranges [list $from $len $values]
226            set lastfrom {}
227        } elseif {$lastfrom != {} } {
228            #Log "lastfrom=$lastfrom lastlen=$lastlen"
229            if { ($lastfrom + $lastlen + 20) > $from} {
230 
231                if {$lastvalues == {}} {
232                    for {set j 0} {$j < $lastlen} {incr j} {
233                        lappend lastvalues [expr $lastfrom + $j]
234                    }
235                    incr lastlen $lastlen
236                }
237                for {set i [expr $lastfrom + $lastlen]} {$i < $from} {incr i} { 
238                    lappend lastvalues $fallback
239                    incr lastlen 
240                }
241                if {$values == {}} {
242                    for {set j 0} {$j < $len} {incr j} {
243                        lappend lastvalues [expr $from + $j]
244                    }
245                    incr lastlen $len
246                } else {
247                    set lastvalues [concat $lastvalues $values]
248                    incr lastlen $len
249                }
250            } else {
251                lappend newranges [list $lastfrom $lastlen $lastvalues]
252                set lastfrom   $from
253                set lastlen    $len
254                set lastvalues $values
255            }
256        } else {
257            set lastfrom   $from
258            set lastlen    $len
259            set lastvalues $values
260        }
261    }
262    if {$lastfrom != {} } {
263        lappend newranges [list $lastfrom $lastlen $lastvalues]
264    }
265    return $newranges
266}
267
268
269#-----------------------------------------------------------------------------
270#   OutputCode
271#
272#-----------------------------------------------------------------------------
273proc OutputCode { encVar fallback ranges } {   
274
275    puts "static TEncodingRule TDOM_UnicodeTo$encVar \[\] = \{"
276
277    foreach range $ranges {
278        foreach {from len values} $range break
279        if {$values == {}} {
280            puts "    \{ ENC_IDENTITY, $from, $len, \"\" \}, "
281        } else {
282            puts "    \{ ENC_MAP, $from, $len, "
283            set i 0
284            foreach value $values {
285                if {$i == 0} { 
286                    puts -nonewline "        \""
287                }
288                puts -nonewline "[HEX $value]"
289                incr i
290                if {$i == 14} { 
291                    puts -nonewline "\"\n"
292                    set i 0 
293                }
294            }
295            if {$i > 0} {
296                puts  -nonewline "\" \},\n"
297            } else {
298                puts  -nonewline "    \},\n"
299            }
300        }
301    }
302    puts "    \{ ENC_END, 0, 0, NULL \} "
303    puts "\};\n"
304}
305
306
307
308#-----------------------------------------------------------------------------
309#   begin of main part
310#-----------------------------------------------------------------------------
311
312
313  puts "/*------------------------------------------------------------------------"
314  puts "|   WARNING! This is file automatically generated by GenCompactCodings !  "
315  puts "|   WARNING!         Do not edit!                                         "
316  puts "|                                                                         "
317  puts "|   Unicode(UTF) ---> 8bit code conversion tables                         "
318  puts "|                                                                         "
319  puts "\\-----------------------------------------------------------------------*/"
320
321
322  set fallbacks {}
323  set encodings {}
324
325  foreach encodingFile $argv {
326
327      regsub {(\.enc)$} $encodingFile {} encoding
328      set encVar [string toupper $encoding]
329      regsub -- {-} $encVar {} encVar
330     
331      ReadEncodingFile $encodingFile info map
332
333      foreach from [lsort -integer [array names map]] {
334          Log "$from -> $map($from)"
335      }
336
337      #-------------------------------------------
338      #   build the initial map ranges
339      #-------------------------------------------
340      set ranges [ BuildInitalRanges info map ]
341
342      Log "Starting ranges [llength $ranges]:"
343      foreach range $ranges {
344          foreach {from len values} $range break
345          Log [format "%3d %3d '%s'" $from $len $values]
346      }
347
348      #-------------------------------------------
349      #   iterate to optimize ranges
350      #-------------------------------------------
351      for {set loop 0} {$loop < 4} {incr loop} {
352          set ranges [OptimizeRanges $info(fallback) $ranges]
353      }
354
355      Log "End ranges [llength $ranges]:"
356      foreach range $ranges {
357          foreach {from len values} $range break
358          Log [format "%3d %3d '%s'\n" $from $len $values]
359      }
360
361      lappend fallbacks $info(fallback)  
362      lappend encodings $encoding $encVar  
363
364      OutputCode $encVar $info(fallback) $ranges
365  }
366
367  puts ""
368  puts "static TEncoding TDOM_UnicodeTo8bitEncodings \[\] = \{"
369  foreach {encoding encVar} $encodings fallback $fallbacks {
370      puts stdout  [format "    { %-12s, %4s, %s }," \
371                           "\"$encoding\""           \
372                           [HexValue $fallback]      \
373                           TDOM_UnicodeTo$encVar     ]
374     
375  }
376  puts "    { NULL, 0, NULL }"
377  puts "\};"
378
379
380#-----------------------------------------------------------------------------
381#   end of main part
382#-----------------------------------------------------------------------------
383
384