1#---------------------------------------------------------------------------- 2# Copyright (c) 1999,2000 Jochen Loewer (loewerj@hotmail.com) et al. 3#---------------------------------------------------------------------------- 4# 5# Rcsid: @(#)$Id: tdomhtml.tcl,v 1.2 2003/04/20 10:50:00 rolf Exp $ 6# 7# Implements simple HTML layer on top of core DOM Level-1 specification, 8# as implemented in tDOM package. 9# 10# The contents of this file are subject to the Mozilla Public License 11# Version 1.1 (the "License"); you may not use this file except in 12# compliance with the License. You may obtain a copy of the License at 13# http://www.mozilla.org/MPL/ 14# 15# Software distributed under the License is distributed on an "AS IS" 16# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 17# License for the specific language governing rights and limitations 18# under the License. 19# 20# The Original Code is tDOM. 21# The Initial Developer of the Original Code is Jochen Loewer. 22# 23# Portions created by Jochen Loewer are Copyright (C) 1998, 1999 24# Jochen Loewer. All Rights Reserved. 25# 26# Portions created by Zoran Vasiljevic are Copyright (C) 2000-2002 27# Zoran Vasiljevic. All Rights Reserved. 28# 29# Portions created by Rolf Ade are Copyright (C) 1999-2002 30# Rolf Ade. All Rights Reserved. 31 32# Contributor(s): 33# 34# 3 Apr 2000 Zoran Vasiljevic (zoran@v-connect.com) 35# Initial idea 36# 37# 20 Oct 2002 Rolf Ade (rolf@pointsman.de) 38# Suggestion to rewrite with new tdom :) 39# 40# 23 Oct 2002 Zoran Vasiljevic (zoran@archiware.com) 41# Rewritten from scratch using new tdom. 42# 43# Written by Zoran Vasiljevic 44# April, 2000 45# 46#---------------------------------------------------------------------------- 47 48# 49# This package requires the loaded tdom 50# so bark early if we can't find it. 51# 52 53package require tdom 54 55# 56# Caller (usually our pkgIndex.tcl loader) will supply the package 57# version by defining the _V_ variable before sourcing this file. 58# For all other cases, we just provide the no-version package. 59# 60 61if {[info exists _V_] == 0} { 62 package provide tdomhtml 63} else { 64 package provide tdomhtml $_V_ 65} 66 67# 68# Declare HTML generating commands 69# 70 71namespace eval ::dom::domHTML { 72 73 # 74 # Create commands for generating HTML tags. This is a complete 75 # set taken from http://www.w3.org/TR/html4/index/elements.html 76 # 77 78 variable elementNodeCmd { 79 a 80 abbr 81 acronym 82 address 83 applet 84 area 85 b 86 base 87 basefont 88 bdo 89 big 90 blockquote 91 body 92 br 93 button 94 caption 95 center 96 cite 97 code 98 col 99 colgroup 100 dd 101 del 102 dfn 103 dir 104 div 105 dl 106 dt 107 em 108 fieldset 109 font 110 form 111 frame 112 frameset 113 h1 114 h2 115 h3 116 h4 117 h5 118 h6 119 head 120 hr 121 html 122 i 123 iframe 124 img 125 input 126 ins 127 isindex 128 kbd 129 label 130 legend 131 li 132 link 133 map 134 menu 135 meta 136 noframes 137 noscript 138 object 139 ol 140 optgroup 141 option 142 p 143 param 144 pre 145 q 146 s 147 samp 148 script 149 select 150 small 151 span 152 strike 153 strong 154 style 155 sub 156 sup 157 table 158 tbody 159 td 160 textarea 161 tfoot 162 th 163 thead 164 title 165 tr 166 tt 167 u 168 ul 169 var 170 } 171 172 foreach nodecmd $elementNodeCmd { 173 dom createNodeCmd elementNode $nodecmd 174 } 175 176 # 177 # Miscelaneous commands. Not part of HTML specs but needed 178 # for generation of special DOM nodes. 179 # 180 181 variable textNodeCmd t 182 dom createNodeCmd textNode $textNodeCmd 183 184 variable commentNodeCmd c 185 dom createNodeCmd commentNode $commentNodeCmd 186} 187 188#----------------------------------------------------------------------------- 189# ::dom::domHTML::newdoc -- 190# 191# Creates the HTML document and fils it with content. 192# Note: script is evaluated in the context of ::dom::domHTML namespace. 193#----------------------------------------------------------------------------- 194 195proc ::dom::domHTML::newdoc {script {upvars {}}} { 196 197 foreach name $upvars { upvar $name $name } 198 199 set doc [dom createDocument html] 200 [$doc documentElement] appendFromScript $script 201 202 return $doc 203} 204 205#----------------------------------------------------------------------------- 206# ::dom::domHTML::putdoc -- 207# 208# Convenience wrapper to serialize the document to the output channel 209#----------------------------------------------------------------------------- 210 211proc ::dom::domHTML::putdoc {doc chan} { 212 213 [$doc documentElement] asHTML -channel $chan 214} 215 216#----------------------------------------------------------------------------- 217# ::dom::domHTML::deldoc -- 218# 219# Convenience wrapper to dispose the html document 220#----------------------------------------------------------------------------- 221 222proc ::dom::domHTML::deldoc {doc} { 223 224 $doc delete 225} 226 227#----------------------------------------------------------------------------- 228# ::dom::domHTML::html2tcl -- 229# 230# Parses the html file and creates a Tcl script usable for passing 231# to the ::dom::domHTML::newdoc command. 232#----------------------------------------------------------------------------- 233 234proc ::dom::domHTML::html2tcl {htmlfile {outfile ""}} { 235 236 # 237 # Slurp-in the entire html file 238 # 239 240 set ichan [open $htmlfile] 241 set html [read $ichan] 242 close $ichan 243 244 # 245 # Create in-memory DOM tree by parsing 246 # the html content with the built-in 247 # tdom html parser. 248 # 249 250 dom parse -html $html doc 251 252 # 253 # Open output file and recursively 254 # format all elements found there. 255 # 256 257 if {$outfile == ""} { 258 set outfile [file root $htmlfile].tcl 259 } 260 261 set ochan [open $outfile w] 262 _2tcl [$doc documentElement] $ochan 263 close $ochan 264} 265 266#----------------------------------------------------------------------------- 267# ::dom::domHTML::_2tcl -- 268# 269# Helper procedure for recursively parsing the html tag 270#----------------------------------------------------------------------------- 271 272proc ::dom::domHTML::_2tcl {top ochan {indent 2} {offset 0}} { 273 274 variable commentNodeCmd 275 variable textNodeCmd 276 variable elementNodeCmd 277 278 set space [string repeat " " $offset] 279 280 foreach child [$top childNodes] { 281 switch -- [$child nodeType] { 282 ELEMENT_NODE { 283 284 # Emit the nodename as html command 285 # and create node command if missing 286 set nodecmd [string tolower [$child nodeName]] 287 if {[lsearch $elementNodeCmd $name] == -1} { 288 dom createNodeCmd elementNode $nodecmd 289 } 290 puts -nonewline $ochan $space 291 puts -nonewline $ochan $nodecmd 292 293 # Emit node attributes as key/value pairs 294 foreach att [$child attributes] { 295 puts -nonewline $ochan " " 296 puts -nonewline $ochan [string tolower $att] 297 puts -nonewline $ochan " " 298 set val [_entityesc [$child getAttribute $att]] 299 if {[regexp { } $val]} { 300 puts -nonewline $ochan \"$val\" 301 } else { 302 puts -nonewline $ochan $val 303 } 304 } 305 306 # Recurse to child nodes 307 if {[llength [$child childNodes]]} { 308 puts $ochan " {" 309 _2tcl $child $ochan $indent [expr {$offset+$indent}] 310 puts -nonewline $ochan $space 311 puts $ochan "}" 312 } else { 313 puts $ochan "" 314 } 315 } 316 TEXT_NODE - CDATA_SECTION_NODE { 317 318 # Escape contents of text nodes 319 puts -nonewline $ochan $space 320 puts -nonewline $ochan "$textNodeCmd {" 321 puts -nonewline $ochan [_entityesc [$child nodeValue]] 322 puts $ochan "}" 323 } 324 COMMENT_NODE { 325 326 # Pass contents of comment nodes as-is 327 puts -nonewline $ochan $space 328 puts -nonewline $ochan "$commentNodeCmd {" 329 puts -nonewline $ochan [$child nodeValue] 330 puts $ochan "}" 331 } 332 } 333 } 334} 335 336#----------------------------------------------------------------------------- 337# ::dom::domHTML::_entityesc -- 338# 339# Helper procedure for entity escaping 340#----------------------------------------------------------------------------- 341 342proc ::dom::domHTML::_entityesc {string} { 343 344 regsub -all {(&[^;]+;)} $string {\\\1} string 345 regsub -all {([\#\[\]])} $string {\\\1} string 346 347 return $string 348} 349 350#----------------------------------------------------------------------------- 351# Short usage example. 352# 353#----------------------------------------------------------------------------- 354 355if {0} { 356 set doc [dom::domHTML::newdoc { 357 title {t "Test document generated with tDOM"} 358 body { 359 table border 1 width 100 { 360 for {set i 0} {$i < 5} {incr i} { 361 tr { 362 td { 363 i { 364 t "italic $i and " 365 b {t "italic-bold $i"} 366 } 367 } 368 } 369 } 370 } 371 } 372 }] 373 374 dom::domHTML::putdoc $doc stdout 375 dom::domHTML::deldoc $doc 376} 377 378# - EOF - 379