1#----------------------------------------------------------------------------
2#   Copyright (c) 1999,2000 Jochen Loewer (loewerj@hotmail.com) et al.
3#----------------------------------------------------------------------------
4#
5#   Rcsid: @(#)$Id: tdomhtml.tcl,v 1.2 2003/04/20 10:50:00 rolf Exp $
6#
7#   Implements simple HTML layer on top of core DOM Level-1 specification,
8#   as implemented in tDOM package.
9#
10#   The contents of this file are subject to the Mozilla Public License
11#   Version 1.1 (the "License"); you may not use this file except in
12#   compliance with the License. You may obtain a copy of the License at
13#   http://www.mozilla.org/MPL/
14#
15#   Software distributed under the License is distributed on an "AS IS"
16#   basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
17#   License for the specific language governing rights and limitations
18#   under the License.
19#
20#   The Original Code is tDOM.
21#   The Initial Developer of the Original Code is Jochen Loewer.
22#
23#   Portions created by Jochen Loewer are Copyright (C) 1998, 1999
24#   Jochen Loewer. All Rights Reserved.
25#
26#   Portions created by Zoran Vasiljevic are Copyright (C) 2000-2002
27#   Zoran Vasiljevic. All Rights Reserved.
28#
29#   Portions created by Rolf Ade are Copyright (C) 1999-2002
30#   Rolf Ade. All Rights Reserved.
31
32#   Contributor(s):
33#
34#       3 Apr 2000   Zoran Vasiljevic (zoran@v-connect.com)
35#                    Initial idea
36#
37#      20 Oct 2002   Rolf Ade (rolf@pointsman.de)
38#                    Suggestion to rewrite with new tdom :)
39#
40#      23 Oct 2002   Zoran Vasiljevic (zoran@archiware.com)
41#                    Rewritten from scratch using new tdom.
42#
43#   Written by Zoran Vasiljevic
44#   April, 2000
45#
46#----------------------------------------------------------------------------
47
48#
49# This package requires the loaded tdom
50# so bark early if we can't find it.
51#
52
53package require tdom
54
55#
56# Caller (usually our pkgIndex.tcl loader) will supply the package
57# version by defining the _V_ variable before sourcing this file.
58# For all other cases, we just provide the no-version package.
59#
60
61if {[info exists _V_] == 0} {
62    package provide tdomhtml
63} else {
64    package provide tdomhtml $_V_
65}
66
67#
68# Declare HTML generating commands
69#
70
71namespace eval ::dom::domHTML {
72
73    #
74    # Create commands for generating HTML tags. This is a complete
75    # set taken from http://www.w3.org/TR/html4/index/elements.html
76    #
77
78    variable elementNodeCmd {
79        a
80        abbr
81        acronym
82        address
83        applet
84        area
85        b
86        base
87        basefont
88        bdo
89        big
90        blockquote
91        body
92        br
93        button
94        caption
95        center
96        cite
97        code
98        col
99        colgroup
100        dd
101        del
102        dfn
103        dir
104        div
105        dl
106        dt
107        em
108        fieldset
109        font
110        form
111        frame
112        frameset
113        h1
114        h2
115        h3
116        h4
117        h5
118        h6
119        head
120        hr
121        html
122        i
123        iframe
124        img
125        input
126        ins
127        isindex
128        kbd
129        label
130        legend
131        li
132        link
133        map
134        menu
135        meta
136        noframes
137        noscript
138        object
139        ol
140        optgroup
141        option
142        p
143        param
144        pre
145        q
146        s
147        samp
148        script
149        select
150        small
151        span
152        strike
153        strong
154        style
155        sub
156        sup
157        table
158        tbody
159        td
160        textarea
161        tfoot
162        th
163        thead
164        title
165        tr
166        tt
167        u
168        ul
169        var
170    }
171
172    foreach nodecmd $elementNodeCmd {
173        dom createNodeCmd elementNode $nodecmd
174    }
175
176    #
177    # Miscelaneous commands. Not part of HTML specs but needed
178    # for generation of special DOM nodes.
179    #
180
181    variable textNodeCmd t
182    dom createNodeCmd textNode $textNodeCmd
183
184    variable commentNodeCmd c
185    dom createNodeCmd commentNode $commentNodeCmd
186}
187
188#-----------------------------------------------------------------------------
189# ::dom::domHTML::newdoc --
190#
191# Creates the HTML document and fils it with content.
192# Note: script is evaluated in the context of ::dom::domHTML namespace.
193#-----------------------------------------------------------------------------
194
195proc ::dom::domHTML::newdoc {script {upvars {}}} {
196
197    foreach name $upvars { upvar $name $name }
198
199    set doc [dom createDocument html]
200    [$doc documentElement] appendFromScript $script
201
202    return $doc
203}
204
205#-----------------------------------------------------------------------------
206# ::dom::domHTML::putdoc --
207#
208# Convenience wrapper to serialize the document to the output channel
209#-----------------------------------------------------------------------------
210
211proc ::dom::domHTML::putdoc {doc chan} {
212
213    [$doc documentElement] asHTML -channel $chan
214}
215
216#-----------------------------------------------------------------------------
217# ::dom::domHTML::deldoc --
218#
219# Convenience wrapper to dispose the html document
220#-----------------------------------------------------------------------------
221
222proc ::dom::domHTML::deldoc {doc} {
223
224    $doc delete
225}
226
227#-----------------------------------------------------------------------------
228# ::dom::domHTML::html2tcl --
229#
230# Parses the html file and creates a Tcl script usable for passing
231# to the ::dom::domHTML::newdoc command.
232#-----------------------------------------------------------------------------
233
234proc ::dom::domHTML::html2tcl {htmlfile {outfile ""}} {
235
236    #
237    # Slurp-in the entire html file
238    #
239
240    set ichan [open $htmlfile]
241    set html  [read $ichan]
242    close $ichan
243
244    #
245    # Create in-memory DOM tree by parsing
246    # the html content with the built-in
247    # tdom html parser.
248    #
249
250    dom parse -html $html doc
251
252    #
253    # Open output file and recursively
254    # format all elements found there.
255    #
256
257    if {$outfile == ""} {
258        set outfile [file root $htmlfile].tcl
259    }
260
261    set ochan [open $outfile w]
262    _2tcl [$doc documentElement] $ochan
263    close $ochan
264}
265
266#-----------------------------------------------------------------------------
267# ::dom::domHTML::_2tcl --
268#
269# Helper procedure for recursively parsing the html tag
270#-----------------------------------------------------------------------------
271
272proc ::dom::domHTML::_2tcl {top ochan {indent 2} {offset 0}} {
273
274    variable commentNodeCmd
275    variable textNodeCmd
276    variable elementNodeCmd
277
278    set space [string repeat " " $offset]
279
280    foreach child [$top childNodes] {
281        switch -- [$child nodeType] {
282            ELEMENT_NODE {
283
284                # Emit the nodename as html command
285                # and create node command if missing
286                set nodecmd [string tolower [$child nodeName]]
287                if {[lsearch $elementNodeCmd $name] == -1} {
288                    dom createNodeCmd elementNode $nodecmd
289                }
290                puts -nonewline $ochan $space
291                puts -nonewline $ochan $nodecmd
292
293                # Emit node attributes as key/value pairs
294                foreach att [$child attributes] {
295                    puts -nonewline $ochan " "
296                    puts -nonewline $ochan [string tolower $att]
297                    puts -nonewline $ochan " "
298                    set val [_entityesc [$child getAttribute $att]]
299                    if {[regexp { } $val]} {
300                        puts -nonewline $ochan \"$val\"
301                    } else {
302                        puts -nonewline $ochan $val
303                    }
304                }
305
306                # Recurse to child nodes
307                if {[llength [$child childNodes]]} {
308                    puts $ochan " {"
309                    _2tcl $child $ochan $indent [expr {$offset+$indent}]
310                    puts -nonewline $ochan $space
311                    puts $ochan "}"
312                } else {
313                    puts $ochan ""
314                }
315            }
316            TEXT_NODE - CDATA_SECTION_NODE {
317
318                # Escape contents of text nodes
319                puts -nonewline $ochan $space
320                puts -nonewline $ochan "$textNodeCmd {"
321                puts -nonewline $ochan [_entityesc [$child nodeValue]]
322                puts $ochan "}"
323            }
324            COMMENT_NODE {
325
326                # Pass contents of comment nodes as-is
327                puts -nonewline $ochan $space
328                puts -nonewline $ochan "$commentNodeCmd {"
329                puts -nonewline $ochan [$child nodeValue]
330                puts $ochan "}"
331            }
332        }
333    }
334}
335
336#-----------------------------------------------------------------------------
337# ::dom::domHTML::_entityesc --
338#
339# Helper procedure for entity escaping
340#-----------------------------------------------------------------------------
341
342proc ::dom::domHTML::_entityesc {string} {
343
344    regsub -all {(&[^;]+;)}  $string {\\\1} string
345    regsub -all {([\#\[\]])} $string {\\\1} string
346
347    return $string
348}
349
350#-----------------------------------------------------------------------------
351# Short usage example.
352#
353#-----------------------------------------------------------------------------
354
355if {0} {
356    set doc [dom::domHTML::newdoc {
357        title {t "Test document generated with tDOM"}
358        body {
359            table border 1 width 100 {
360                for {set i 0} {$i < 5} {incr i} {
361                    tr {
362                        td {
363                            i {
364                                t "italic $i and "
365                                b {t "italic-bold $i"}
366                            }
367                        }
368                    }
369                }
370            }
371        }
372    }]
373
374    dom::domHTML::putdoc $doc stdout
375    dom::domHTML::deldoc $doc
376}
377
378# - EOF -
379